#!/usr/bin/env python
# -*- coding: utf-8 -*-

# 导入相应的库文件
import xlwt
import requests
from lxml import etree
import time


# 初始化列表，存入爬虫数据
all_info_list = []


# 定义获取爬虫信息的函数
def get_info(url):

    html = requests.get(url)
    selector = etree.HTML(html.text)

    # 定位大标签，以此循环
    infos = selector.xpath('//ul[@class="all-img-list cf"]/li')

    for info in infos:
        title = info.xpath('div[2]/h4/a/text()')[0]
        author = info.xpath('div[2]/p[1]/a[1]/text()')[0]
        style_1 = info.xpath('div[2]/p[1]/a[2]/text()')[0]
        style_2 = info.xpath('div[2]/p[1]/a[3]/text()')[0]
        style = style_1+'·'+style_2
        complete = info.xpath('div[2]/p[1]/span/text()')[0]
        introduce = info.xpath('div[2]/p[2]/text()')[0].strip()
        word = info.xpath('div[2]/p[3]/span/text()')[0].strip('万字')
        info_list = [title, author, style, complete, introduce, word]
        # 把数据存入列表
        all_info_list.append(info_list)
        # 睡眠1秒
        time.sleep(1)


# 程序主入口
if __name__ == '__main__':

    urls = ['http://www.rkpass.cn/i/tk_timu/4_699_{}_xuanze.html'.format(str(i)) for i in range(1, 75)]
    # 获取所有数据
    for url in urls:
        print(url)
        get_info(url)

    # # 定义表头
    # header = ['title', 'author', 'style', 'complete', 'introduce', 'word']
    # # 创建工作簿
    # book = xlwt.Workbook(encoding='utf-8')
    # # 创建工作表
    # sheet = book.add_sheet('Sheet1')
    # for h in range(len(header)):
    #     # 写入表头
    #     sheet.write(0, h, header[h])
    #
    # i = 1  # 行数
    # for list in all_info_list:
    #     j = 0  # 列数
    #     # 写入爬虫数据
    #     for data in list:
    #         sheet.write(i, j, data)
    #         j += 1
    #     i += 1
    # # 保存文件
    # book.save('xiaoshuo.xls')